Import Packages

# load packages, installing if missing
if (!require(librarian)){
  install.packages("librarian")
  library(librarian)
}
librarian::shelf(
  dismo, dplyr, DT, ggplot2, here, htmltools, leaflet, mapview, purrr, raster, readr, rgbif, rgdal, rJava, sdmpredictors, sf, spocc, tidyr, GGally, caret, pdp, ranger, rpart, rpart.plot, rsample, skimr, vip, maptools, usdm)
select <- dplyr::select # overwrite raster::select
options(readr.show_col_types = FALSE,
        scipen = 999)

# set random seed for reproducibility
set.seed(42)

# directory to store data
dir_data      <- here("data/sdm")
env_stack_grd <- file.path(dir_data, "env_stack.grd")
mdl_maxv_rds  <- file.path(dir_data, "mdl_maxent_vif.rds")

dir.create(dir_data, showWarnings = F)

# read raster stack of environment
#env_stack <- raster::stack(env_stack_grd)

Lab 1a: Exploratory Data Analysis

Get Species Observations

The Species that I will be looking at is the Glaucomys sabrinus, or the northern flying squirrel.

url <- "https://cff2.earth.com/uploads/2017/01/03144442/Glaucomys-sabrinus-coloratus-750x400.jpg"
knitr::include_graphics(url)

obs_csv <- file.path(dir_data, "obs.csv")
obs_geo <- file.path(dir_data, "obs.geojson")
redo    <- FALSE

if (!file.exists(obs_geo) | redo){
  # get species occurrence data from GBIF with coordinates
  (res <- spocc::occ(
    query = 'Glaucomys sabrinus', 
    from = 'gbif', has_coords = T,
    limit = 10000))
  
  # extract data frame from result
  df <- res$gbif$data[[1]]
  readr::write_csv(df, obs_csv)
  
  # convert to points of observation from lon/lat columns in data frame
  obs <- df %>% 
    filter(latitude > 26, longitude > -160) %>%
    sf::st_as_sf(
      coords = c("longitude", "latitude"),
      crs = st_crs(4326)) %>% 
    select(prov, key) # save space (joinable from obs_csv)
  sf::write_sf(obs, obs_geo, delete_dsn=T)
}
obs <- sf::read_sf(obs_geo)
nrow(obs) # number of rows
## [1] 3792
# show points on map
mapview::mapview(obs, map.types = "OpenTopoMap")

Question 1. How many observations total are in GBIF for your species? (Hint: ?occ)

As we can see in the df dataframe there are 3797 observations total in GBIF for my species.

Question 2. Do you see any odd observations, like marine species on land or vice versa? If so, please see the Data Cleaning and explain what you did to fix or remove these points.

Yes, there are some strange observations that are in South America and Africa, when this species should only be seen in North America. So I filtered the dataset by lon and lat so that only oberservations in North America will show up. This leaves us with 3792 observations.

Get Environmetal Data

Presence Points
dir_env <- file.path(dir_data, "env")

# set a default data directory
options(sdmpredictors_datadir = dir_env)

# choosing terrestrial
env_datasets <- sdmpredictors::list_datasets(terrestrial = TRUE, marine = FALSE)

# show table of datasets
env_datasets %>% 
  select(dataset_code, description, citation) %>% 
  DT::datatable()
# choose datasets for a vector
env_datasets_vec <- c("WorldClim", "ENVIREM")

# get layers
env_layers <- sdmpredictors::list_layers(env_datasets_vec)
DT::datatable(env_layers)
# choose layers after some inspection and perhaps consulting literature
env_layers_vec <- c("WC_alt", "WC_bio1", "WC_bio2", "ER_tri", "ER_topoWet")

# get layers
env_stack <- load_layers(env_layers_vec)

# interactive plot layers, hiding all but first (select others)
# mapview(env_stack, hide = T) # makes the html too big for Github
plot(env_stack, nc=2)

obs_hull_geo  <- file.path(dir_data, "obs_hull.geojson")
env_stack_grd <- file.path(dir_data, "env_stack.grd")

if (!file.exists(obs_hull_geo) | redo){
  # make convex hull around points of observation
  obs_hull <- sf::st_convex_hull(st_union(obs))
  
  # save obs hull
  write_sf(obs_hull, obs_hull_geo)
}
obs_hull <- read_sf(obs_hull_geo)

# show points on map
mapview(
  list(obs, obs_hull))
if (!file.exists(env_stack_grd) | redo){
  obs_hull_sp <- sf::as_Spatial(obs_hull)
  env_stack <- raster::mask(env_stack, obs_hull_sp) %>% 
    raster::crop(extent(obs_hull_sp))
  writeRaster(env_stack, env_stack_grd, overwrite=T)  
}
env_stack <- stack(env_stack_grd)

# show map
# mapview(obs) + 
#   mapview(env_stack, hide = T) # makes html too big for Github
plot(env_stack, nc=2)

###### Psuedo-Absense Points

absence_geo <- file.path(dir_data, "absence.geojson")
pts_geo     <- file.path(dir_data, "pts.geojson")
pts_env_csv <- file.path(dir_data, "pts_env.csv")


if (!file.exists(absence_geo) | redo){
  # get raster count of observations
  r_obs <- rasterize(
    sf::as_Spatial(obs), env_stack[[1]], field=1, fun='count')
  
  # show map
  # mapview(obs) + 
  #   mapview(r_obs)
  
  # create mask for 
  r_mask <- mask(env_stack[[1]] > -Inf, r_obs, inverse=T)
  
  # generate random points inside mask
  absence <- dismo::randomPoints(r_mask, nrow(obs)) %>% 
    as_tibble() %>% 
    st_as_sf(coords = c("x", "y"), crs = 4326)
  
  write_sf(absence, absence_geo, delete_dsn=T)
}
absence <- read_sf(absence_geo)

# show map of presence, ie obs, and absence
mapview(obs, col.regions = "green") + 
  mapview(absence, col.regions = "gray")
if (!file.exists(pts_env_csv) | redo){

  # combine presence and absence into single set of labeled points 
  pts <- rbind(
    obs %>% 
      mutate(
        present = 1) %>% 
      select(present, key),
    absence %>% 
      mutate(
        present = 0,
        key     = NA)) %>% 
    mutate(
      ID = 1:n()) %>% 
    relocate(ID)
  write_sf(pts, pts_geo, delete_dsn=T)

  # extract raster values for points
  pts_env <- raster::extract(env_stack, as_Spatial(pts), df=TRUE) %>% 
    tibble() %>% 
    # join present and geometry columns to raster value results for points
    left_join(
      pts %>% 
        select(ID, present),
      by = "ID") %>% 
    relocate(present, .after = ID) %>% 
    # extract lon, lat as single columns
    mutate(
      #present = factor(present),
      lon = st_coordinates(geometry)[,1],
      lat = st_coordinates(geometry)[,2]) %>% 
    select(-geometry)
  write_csv(pts_env, pts_env_csv)
}
pts_env <- read_csv(pts_env_csv)

pts_env %>% 
  # show first 10 presence, last 10 absence
  slice(c(1:10, (nrow(pts_env)-9):nrow(pts_env))) %>% 
  DT::datatable(
    rownames = F,
    options = list(
      dom = "t",
      pageLength = 20))

Term Plots

pts_env %>% 
  select(-ID) %>% 
  mutate(
    present = factor(present)) %>% 
  pivot_longer(-present) %>% 
  ggplot() +
  geom_density(aes(x = value, fill = present)) + 
  scale_fill_manual(values = alpha(c("gray", "green"), 0.5)) +
  scale_x_continuous(expand=c(0,0)) +
  scale_y_continuous(expand=c(0,0)) +
  theme_bw() + 
  facet_wrap(~name, scales = "free") +
  theme(
    legend.position = c(1, 0),
    legend.justification = c(1, 0))

Lab 1b: Logistic Regression

Explore (cont’d)

# Look at the data in a dataframe.

datatable(pts_env, rownames = F)
# Now look at it in a pairs plot.

GGally::ggpairs(
  select(pts_env, -ID),
  aes(color = factor(present)))

Logistic Regression

Setup Data

# setup model data
d <- pts_env %>% 
  select(-ID) %>%  # remove terms we don't want to model
  tidyr::drop_na() # drop the rows with NA values
nrow(d)
## [1] 7551

Linear Model

# fit a linear model
mdl <- lm(present ~ ., data = d)
summary(mdl)
## 
## Call:
## lm(formula = present ~ ., data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2046 -0.3802 -0.0042  0.4029  1.0084 
## 
## Coefficients:
##                Estimate  Std. Error t value             Pr(>|t|)    
## (Intercept)  4.74328211  0.18302368  25.916 < 0.0000000000000002 ***
## WC_alt      -0.00025023  0.00002043 -12.249 < 0.0000000000000002 ***
## WC_bio1     -0.03670053  0.00367575  -9.985 < 0.0000000000000002 ***
## WC_bio2     -0.05703474  0.00337749 -16.887 < 0.0000000000000002 ***
## ER_tri      -0.00183074  0.00025146  -7.280    0.000000000000367 ***
## ER_topoWet  -0.12936781  0.00595308 -21.731 < 0.0000000000000002 ***
## lon         -0.00822231  0.00055788 -14.738 < 0.0000000000000002 ***
## lat         -0.05503729  0.00334892 -16.434 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4407 on 7543 degrees of freedom
## Multiple R-squared:  0.224,  Adjusted R-squared:  0.2233 
## F-statistic:   311 on 7 and 7543 DF,  p-value: < 0.00000000000000022
y_predict <- predict(mdl, d, type="response")
y_true    <- d$present

range(y_predict)
## [1] -0.1133006  1.2159379
range(y_true)
## [1] 0 1

Generalized Linear Model

# fit a generalized linear model with a binomial logit link function
mdl <- glm(present ~ ., family = binomial(link="logit"), data = d)
summary(mdl)
## 
## Call:
## glm(formula = present ~ ., family = binomial(link = "logit"), 
##     data = d)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7173  -0.9518  -0.3569   0.9778   2.2628  
## 
## Coefficients:
##              Estimate Std. Error z value             Pr(>|z|)    
## (Intercept) 22.054337   1.048234  21.040 < 0.0000000000000002 ***
## WC_alt      -0.001326   0.000113 -11.739 < 0.0000000000000002 ***
## WC_bio1     -0.200805   0.020279  -9.902 < 0.0000000000000002 ***
## WC_bio2     -0.296798   0.018386 -16.143 < 0.0000000000000002 ***
## ER_tri      -0.010043   0.001334  -7.529    0.000000000000051 ***
## ER_topoWet  -0.655803   0.032787 -20.002 < 0.0000000000000002 ***
## lon         -0.043951   0.003068 -14.324 < 0.0000000000000002 ***
## lat         -0.290039   0.018853 -15.384 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 10467.9  on 7550  degrees of freedom
## Residual deviance:  8588.5  on 7543  degrees of freedom
## AIC: 8604.5
## 
## Number of Fisher Scoring iterations: 4
y_predict <- predict(mdl, d, type="response")

range(y_predict)
## [1] 0.04340474 0.97714602
# show term plots
termplot(mdl, partial.resid = TRUE, se = TRUE, main = F, ylim="free")

Generalized Additive Model

librarian::shelf(mgcv)

# fit a generalized additive model with smooth predictors
mdl <- mgcv::gam(
  formula = present ~ s(WC_alt) + s(WC_bio1) + 
    s(WC_bio2) + s(ER_tri) + s(ER_topoWet) + s(lon) + s(lat), 
  family = binomial, data = d)
summary(mdl)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## present ~ s(WC_alt) + s(WC_bio1) + s(WC_bio2) + s(ER_tri) + s(ER_topoWet) + 
##     s(lon) + s(lat)
## 
## Parametric coefficients:
##             Estimate Std. Error z value            Pr(>|z|)    
## (Intercept) -0.45748    0.05898  -7.757 0.00000000000000871 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df Chi.sq              p-value    
## s(WC_alt)     8.258  8.848 362.52 < 0.0000000000000002 ***
## s(WC_bio1)    8.525  8.841 499.93 < 0.0000000000000002 ***
## s(WC_bio2)    5.705  6.733 116.01 < 0.0000000000000002 ***
## s(ER_tri)     7.846  8.670  54.80 < 0.0000000000000002 ***
## s(ER_topoWet) 8.667  8.964  42.97           0.00000146 ***
## s(lon)        8.376  8.878 363.69 < 0.0000000000000002 ***
## s(lat)        8.358  8.888 248.82 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.501   Deviance explained = 43.2%
## UBRE = -0.19737  Scale est. = 1         n = 7551
# show term plots
plot(mdl, scale=0)

Maxent (Maximum Entropy)

# load extra packages
librarian::shelf(
  maptools, sf)

mdl_maxent_rds <- file.path(dir_data, "mdl_maxent.rds")

# show version of maxent
if (!interactive())
  maxent()
## This is MaxEnt version 3.4.3
# get environmental rasters
# NOTE: the first part of Lab 1. SDM - Explore got updated to write this clipped environmental raster stack
env_stack_grd <- file.path(dir_data, "env_stack.grd")
env_stack <- stack(env_stack_grd)
plot(env_stack, nc=2)

# get presence-only observation points (maxent extracts raster values for you)
obs_geo <- file.path(dir_data, "obs.geojson")
obs_sp <- read_sf(obs_geo) %>% 
  sf::as_Spatial() # maxent prefers sp::SpatialPoints over newer sf::sf class

# fit a maximum entropy model
if (!file.exists(mdl_maxent_rds)){
  mdl <- maxent(env_stack, obs_sp)
  readr::write_rds(mdl, mdl_maxent_rds)
}
mdl <- read_rds(mdl_maxent_rds)

# plot variable contributions per predictor
plot(mdl)

# plot term plots
response(mdl)

# predict
y_predict <- predict(env_stack, mdl) #, ext=ext, progress='')

plot(y_predict, main='Maxent, raw prediction')
data(wrld_simpl, package="maptools")
plot(wrld_simpl, add=TRUE, border='dark grey')

Lab 1c: Decision Trees

# graphical theme
ggplot2::theme_set(ggplot2::theme_light())

# read data
pts_env <- read_csv(pts_env_csv)
d <- pts_env %>% 
  select(-ID) %>%                   # not used as a predictor x
  mutate(
    present = factor(present)) %>%  # categorical response
  na.omit()                         # drop rows with NA
skim(d)
Data summary
Name d
Number of rows 7551
Number of columns 8
_______________________
Column type frequency:
factor 1
numeric 7
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
present 0 1 FALSE 2 0: 3785, 1: 3766

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
WC_alt 0 1 715.32 690.71 -57.00 215.00 416.00 1081.00 3611.00 ▇▂▂▁▁
WC_bio1 0 1 4.28 5.30 -12.30 1.10 4.90 7.50 23.40 ▁▃▇▂▁
WC_bio2 0 1 11.64 2.64 4.00 10.30 11.60 13.00 19.90 ▁▃▇▃▁
ER_tri 0 1 42.67 45.97 0.00 7.22 22.32 69.34 274.59 ▇▂▁▁▁
ER_topoWet 0 1 10.69 1.92 6.73 8.98 10.78 12.24 15.22 ▅▇▇▇▂
lon 0 1 -105.00 21.97 -154.71 -122.12 -110.50 -85.46 -52.85 ▁▇▅▅▂
lat 0 1 48.11 7.24 33.88 43.21 46.94 53.38 66.12 ▃▇▆▃▂

Split data into Training and Testing

# create training set with 80% of full data
d_split  <- rsample::initial_split(d, prop = 0.8, strata = "present")
d_train  <- rsample::training(d_split)

# show number of rows present is 0 vs 1
table(d$present)
## 
##    0    1 
## 3785 3766
table(d_train$present)
## 
##    0    1 
## 3028 3012

Decision Trees

Partition, depth = 1
# run decision stump model
mdl <- rpart(
  present ~ ., data = d_train, 
  control = list(
    cp = 0, minbucket = 5, maxdepth = 1))
mdl
## n= 6040 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 6040 3012 0 (0.5013245 0.4986755)  
##   2) WC_bio1< 1.95 1766  393 0 (0.7774632 0.2225368) *
##   3) WC_bio1>=1.95 4274 1655 1 (0.3872251 0.6127749) *
# plot tree 
par(mar = c(1, 1, 1, 1))
rpart.plot(mdl)

Partition, depth = default
# decision tree with defaults
mdl <- rpart(present ~ ., data = d_train)
mdl
## n= 6040 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 6040 3012 0 (0.50132450 0.49867550)  
##    2) WC_bio1< 1.95 1766  393 0 (0.77746319 0.22253681)  
##      4) lat>=47.23326 1628  293 0 (0.82002457 0.17997543)  
##        8) lon>=-146.8239 1495  204 0 (0.86354515 0.13645485) *
##        9) lon< -146.8239 133   44 1 (0.33082707 0.66917293) *
##      5) lat< 47.23326 138   38 1 (0.27536232 0.72463768) *
##    3) WC_bio1>=1.95 4274 1655 1 (0.38722508 0.61277492)  
##      6) lat< 42.14012 1114  344 0 (0.69120287 0.30879713)  
##       12) ER_topoWet>=9.33 752   67 0 (0.91090426 0.08909574) *
##       13) ER_topoWet< 9.33 362   85 1 (0.23480663 0.76519337) *
##      7) lat>=42.14012 3160  885 1 (0.28006329 0.71993671)  
##       14) WC_bio2>=12.85 608  282 0 (0.53618421 0.46381579)  
##         28) ER_topoWet>=10.435 302   47 0 (0.84437086 0.15562914) *
##         29) ER_topoWet< 10.435 306   71 1 (0.23202614 0.76797386) *
##       15) WC_bio2< 12.85 2552  559 1 (0.21904389 0.78095611) *
rpart.plot(mdl)

# plot complexity parameter
plotcp(mdl)

# rpart cross validation results
mdl$cptable
##           CP nsplit rel error    xerror       xstd
## 1 0.32005312      0 1.0000000 1.0405046 0.01289211
## 2 0.14143426      1 0.6799469 0.6809429 0.01221916
## 3 0.06374502      2 0.5385126 0.5577689 0.01156177
## 4 0.03452855      3 0.4747676 0.4810757 0.01101830
## 5 0.02058433      5 0.4057105 0.4262948 0.01055674
## 6 0.01494024      6 0.3851262 0.4000664 0.01031142
## 7 0.01000000      7 0.3701859 0.3844622 0.01015733
Feature Interpretation
# caret cross validation results
mdl_caret <- train(
  present ~ .,
  data       = d_train,
  method     = "rpart",
  trControl  = trainControl(method = "cv", number = 10),
  tuneLength = 20)

ggplot(mdl_caret)

vip(mdl_caret, num_features = 40, bar = FALSE)

# Construct partial dependence plots
p1 <- partial(mdl_caret, pred.var = "lat") %>% autoplot()
p2 <- partial(mdl_caret, pred.var = "WC_bio2") %>% autoplot()
p3 <- partial(mdl_caret, pred.var = c("lat", "WC_bio2")) %>% 
  plotPartial(levelplot = FALSE, zlab = "yhat", drape = TRUE, 
              colorkey = TRUE, screen = list(z = -20, x = -60))

# Display plots side by side
gridExtra::grid.arrange(p1, p2, p3, ncol = 3)

Random Forest

Fit
# number of features
n_features <- length(setdiff(names(d_train), "present"))

# fit a default random forest model
mdl_rf <- ranger(present ~ ., data = d_train)

# get out of the box RMSE
(default_rmse <- sqrt(mdl_rf$prediction.error))
## [1] 0.3117458
Feature Interpretation
# re-run model with impurity-based variable importance
mdl_impurity <- ranger(
  present ~ ., data = d_train,
  importance = "impurity")

# re-run model with permutation-based variable importance
mdl_permutation <- ranger(
  present ~ ., data = d_train,
  importance = "permutation")
p1 <- vip::vip(mdl_impurity, bar = FALSE)
p2 <- vip::vip(mdl_permutation, bar = FALSE)

gridExtra::grid.arrange(p1, p2, nrow = 1)

Lab 1d: Evaluate Models

# read points of observation: presence (1) and absence (0)
pts <- read_sf(pts_geo)

Split observations into Training and Testing

# create training set with 80% of full data
pts_split  <- rsample::initial_split(
  pts, prop = 0.8, strata = "present")
pts_train  <- rsample::training(pts_split)
pts_test   <- rsample::testing(pts_split)

pts_train_p <- pts_train %>% 
  filter(present == 1) %>% 
  as_Spatial()
pts_train_a <- pts_train %>% 
  filter(present == 0) %>% 
  as_Spatial()

Calibrate: Model Selection

# show pairs plot before multicollinearity reduction with vifcor()
pairs(env_stack)

# calculate variance inflation factor per predictor, a metric of multicollinearity between variables
vif(env_stack)
##    Variables      VIF
## 1     WC_alt 3.724311
## 2    WC_bio1 1.697796
## 3    WC_bio2 3.465192
## 4     ER_tri 4.392701
## 5 ER_topoWet 4.091482
# stepwise reduce predictors, based on a max correlation of 0.7 (max 1)
v <- vifcor(env_stack, th=0.7) 
v
## 1 variables from the 5 input variables have collinearity problem: 
##  
## ER_tri 
## 
## After excluding the collinear variables, the linear correlation coefficients ranges between: 
## min correlation ( WC_bio1 ~ WC_alt ):  0.02400813 
## max correlation ( ER_topoWet ~ WC_alt ):  -0.558568 
## 
## ---------- VIFs of the remained variables -------- 
##    Variables      VIF
## 1     WC_alt 3.084384
## 2    WC_bio1 1.584131
## 3    WC_bio2 2.847255
## 4 ER_topoWet 2.058726
# reduce enviromental raster stack by 
env_stack_v <- usdm::exclude(env_stack, v)

# show pairs plot after multicollinearity reduction with vifcor()
pairs(env_stack_v)

# fit a maximum entropy model
if (!file.exists(mdl_maxv_rds)){
  mdl_maxv <- maxent(env_stack_v, sf::as_Spatial(pts_train))
  readr::write_rds(mdl_maxv, mdl_maxv_rds)
}
mdl_maxv <- read_rds(mdl_maxv_rds)

# plot variable contributions per predictor
plot(mdl_maxv)

# plot term plots
response(mdl_maxv)

# predict
y_maxv <- predict(env_stack, mdl_maxv) #, ext=ext, progress='')

plot(y_maxv, main='Maxent, raw prediction')
data(wrld_simpl, package="maptools")
plot(wrld_simpl, add=TRUE, border='dark grey')

Evaluate: Model Performance

Area Under the Curve (AUC), Reciever Operater Characteristic (ROC) Curve and Confusion Matrix
pts_test_p <- pts_test %>% 
  filter(present == 1) %>% 
  as_Spatial()
pts_test_a <- pts_test %>% 
  filter(present == 0) %>% 
  as_Spatial()

y_maxv <- predict(mdl_maxv, env_stack)
#plot(y_maxv)

e <- dismo::evaluate(
  p     = pts_test_p,
  a     = pts_test_a, 
  model = mdl_maxv,
  x     = env_stack)
e
## class          : ModelEvaluation 
## n presences    : 756 
## n absences     : 758 
## AUC            : 0.8389856 
## cor            : 0.5861861 
## max TPR+TNR at : 0.656592
plot(e, 'ROC')

thr <- threshold(e)[['spec_sens']]
thr
## [1] 0.656592
p_true <- na.omit(raster::extract(y_maxv, pts_test_p) >= thr)
a_true <- na.omit(raster::extract(y_maxv, pts_test_a) < thr)

# (t)rue/(f)alse (p)ositive/(n)egative rates
tpr <- sum(p_true)/length(p_true)
fnr <- sum(!p_true)/length(p_true)
fpr <- sum(!a_true)/length(a_true)
tnr <- sum(a_true)/length(a_true)

matrix(
  c(tpr, fnr,
    fpr, tnr), 
  nrow=2, dimnames = list(
    c("present_obs", "absent_obs"),
    c("present_pred", "absent_pred")))
##             present_pred absent_pred
## present_obs    0.8148148   0.2493404
## absent_obs     0.1851852   0.7506596
# add point to ROC plot
plot(e, 'ROC')
points(fpr, tpr, pch=23, bg="blue")

plot(y_maxv > thr)